package B2dsc::Controller::B2dsc;
# by lhtk : lhtk80t7@gmail.com
use Mojo::Base 'Mojolicious::Controller';

use autodie qw/open close/;

use Bio::SeqIO;
use Scalar::Util qw/looks_like_number/;
use File::Path qw/remove_tree/;
use File::Basename;

use JSON qw/to_json from_json/;
use ShareSubs qw/get_conf load_done update_done get_seq_href json2href init_conf_file/; # 和 Faidx 共用的一些函数

# -----------------------------------------------
# mod_perl
our $home_dir = $ENV{MOJO_HOME};
$home_dir = '.' unless (defined $home_dir and -d $home_dir);

# 干脆顶层全用 our 得了，这些变量似乎都在子函数中有调用
our $conf_dir = join '/', $home_dir, 'conf/b2dsc';
our $conf_dir_species = "$conf_dir/species";

# 先读基础配置
my $main_conf = join '/', $conf_dir, 'b2dsc.json';
our $kp_conf_file = join '/', $conf_dir, "known_probes.json";
# 2019.1.25
init_conf_file($main_conf);
init_conf_file($kp_conf_file);
our $conf = get_conf($main_conf);
our $data_dir = join '/', $home_dir, $conf->{data_dir};
# web 可直接访问，注意必然是在 public 父目录下的
our $public_tmp_dir = join '/', $home_dir, $conf->{public_tmp_dir}; 
our $days_kept = $conf->{days_kept}; # 保存天数

sub index {
    my $self = shift;
    # 应该不需要读 known_probes
    my $default_species;
    if (exists $conf->{default_species} and (grep { $conf->{default_species} eq $_ } species_available())) {
        load_conf_of_species($conf->{default_species});
        $default_species = $conf->{default_species};
    }
    $self->render(conf => $conf, species_available => [ species_available() ], default_species => $default_species);
}

# 主要工作：分析、收集 post 过来的参数，新建 job，然后提交给 blastn2chrs.pl 运行下一步。
#
# 1. 旧的 job 在执行此函数的时候被删除。
# 2. 有 id 重复直接 return
# 3. 是否有需要虑除的序列。
# 4. 参数等信息写入 job 对应目录下的 done.json 文件。
sub blastn {
    my $self = shift;

    remove_old_jobs();
    # load_conf_of_all_species(); # 2019.7.14 注释掉了
    
    # Check if parameters have been submitted
    my $validation = $self->validation;
    return $self->render('b2dsc/index', conf => $conf) unless $validation->has_data;

    my $min_seq_length = $conf->{min_seq_length};
    my $max_num_of_seq = int($conf->{max_num_of_seq}); # 最大允许序列数

    # -----------------------------------
    # querySeq
    # -----------------------------------
    $validation->required('querySeq')->like(qr/^[[:ascii:]]+$/);
    return $self->render('wrong_params', msg => 'Enter Query Sequence: You entered strange characters! Must be ASCII code!') if $validation->has_error;

    # 前面检查过了非 ascii 字符
    # 2018.2.8
    my $filter_invalid_fasta_seq = sub {
        my $fa_str_ref = shift;
        my $good_seqs = '';
        for my $seq (split />/, $$fa_str_ref) {
            next if $seq =~ /^\s*$/;
            my($header_line, $content) = split /\n/, $seq, 2;
            my $sid = (split /\s+/, $header_line)[0];
            next if $sid =~ /^\s*$/; # 无合适 ID
            next if $content =~ /[^a-zA-Z\s]/; # 里面有奇怪字符
            $good_seqs .= ">$seq\n";
        }
        $$fa_str_ref = $good_seqs;
    };

    my $sequences = $self->param('querySeq');
    $sequences = ">tmpSeq1\n" . $sequences unless $sequences =~ /^>/;
    $filter_invalid_fasta_seq->(\$sequences); # 2018.2.8
    # 万一并没有从网页输入
    if ((not defined $sequences) or (length($sequences) == 0)) {
        $sequences = 'NA';
        return $self->render(template => 'b2dsc/no_valid_dna_seq', seq => $self->param('querySeq'), min_length => $min_seq_length);
    }

    # 获得前 20 个序列的信息
    my $qseq_info = parse_seq_submitted({
        seqs => \$sequences,
        max_num_of_seq => $max_num_of_seq,
        min_seq_length => $min_seq_length,
    });
    # 没有合格的序列
    if ($qseq_info->{seq_cnt} == 0) {
        return $self->render(template => 'b2dsc/no_valid_dna_seq', seq => $self->param('querySeq'), min_length => $min_seq_length);
    }
    rm_ids_not_dup($qseq_info->{ids});
    # id 重复直接返回
    if (%{ $qseq_info->{ids} }) {
        return $self->render(template => 'b2dsc/duplicate_id', seq => $sequences, dup => $qseq_info->{ids});
    }
    $sequences = ${$qseq_info->{seqs}}; # 更新 $sequences

    # species, chromosomes
    # $validation->required('species')->in(keys %{ $conf->{species} });
    $validation->required('species')->in(species_available());
    return $self->render('wrong_params', msg => 'Please select a species!') if $validation->has_error;
    $validation->required('chrs[]');
    return $self->render('wrong_params', msg => 'Please select at least one chromosome!') if $validation->has_error;
    my $species = $self->param('species');
    load_conf_of_species($species); # 2019..14 添加
    my $chromosomes = join ',', @{ $self->every_param('chrs[]') };

    # blastn 参数，有默认值
    my $perc_identity = $self->param('perc_identity');
    $perc_identity = 50 unless isValidNumericParam($perc_identity, 0, 100);
    my $qcov_hsp_perc = $self->param('qcov_hsp_perc');
    $qcov_hsp_perc = 50 unless isValidNumericParam($qcov_hsp_perc, 0, 100);
    my $evalue = $self->param('evalue');
    $evalue = 10 unless (defined $evalue and looks_like_number($evalue));
    my $evalue_short = $self->param('evalue_short');
    $evalue_short = 1000 unless (defined $evalue_short and looks_like_number($evalue_short));
    my $word_size = $self->param('word_size');
    $word_size = 11 unless (defined $word_size and looks_like_number($word_size));
    my $word_size_short = $self->param('word_size_short');
    $word_size_short = 7 unless (defined $word_size_short and looks_like_number($word_size_short));
    my $dust = $self->param('dust');
    $dust = 0 unless (defined $dust);
    # 万一脚本 post 了
    $validation->required('specifiers[]');
    return $self->render('wrong_params', msg => 'Please select specifiers!') if $validation->has_error;
    my @specifiers = @{ $self->every_param('specifiers[]') };
    my @default_specifiers = grep {
        exists $conf->{ofmt_specifiers}{required}{$_}
        or
        exists $conf->{ofmt_specifiers}{optional}{$_}
    } split /\s+/, $conf->{ofmt};
    # die "@specifiers\n@default_specifiers\n"; # debug

    my %info; # raw ids 等，存入 done.json
    # 任务 id，根据 id 新建文件夹
    my $job_id = new_job(time); # 这里根据时间戳来得到不重复数字
    @info{qw/job_id species chromosomes/} = ($job_id, $species, $chromosomes);
    $info{status} = 'blastn'; # 当前进行中的任务
    @info{qw/perc_identity qcov_hsp_perc evalue evalue_short word_size word_size_short dust/} = ($perc_identity, $qcov_hsp_perc, $evalue, $evalue_short, $word_size, $word_size_short, $dust);
    $info{num_of_seqs} = $qseq_info->{seq_cnt};
    $info{raw_id}{qseq} = $qseq_info->{raw_id}; # seq2filter 也加进去
    $info{chromosome_length} = $conf->{species}{$species}{chromosome_length};
    # 不再往 done.json 里存，b2dsc.json 里有即可
    # if (exists $conf->{refSeq}{$species}) {
    #     $info{ref_seq} = $conf->{refSeq}{$species};
    # }
    $info{qseq_lens} = $qseq_info->{seq_lens};
    unless (join(' ', sort @specifiers) eq join(' ', sort @default_specifiers)) {
        # conf 里的 ofmt 必须设置正确，设置不正确就自己滚蛋吧。
        $info{ofmt} = $conf->{ofmt};
        # 删除被勾选取消的默认设定项
        for my $s (@default_specifiers) {
            unless (grep { $s eq $_ } @specifiers) {
                $info{ofmt} =~ s/\b$s\b//g;
            }
        }
        # 添上新勾选的设定项
        for my $s (@specifiers) {
            unless (grep { $s eq $_ } @default_specifiers) {
                $info{ofmt} .= " $s";
            }
        }
        # 去除多余空白
        $info{ofmt} =~ s/\s+/ /g;
    }

    # 将 raw_id 等信息存到 done.json 里
    update_done(\%info, $job_id, $public_tmp_dir);

    # 改变之后用于 blastn 的
    write_data2file(\$sequences, "$data_dir/$job_id/input.fa");

    # -----------------------------------
    # seq2filter
    # -----------------------------------
    my $bl2f = $self->param('BL2F');
    my $seq2filter = $self->param('seq2filter');
    $seq2filter = ">tmpFseq1\n" . $seq2filter unless $seq2filter =~ /^>/;
    $filter_invalid_fasta_seq->(\$seq2filter); # 2018.2.8
    # 没有勾选 BL2F 就直接跳过
    if (((defined $bl2f) and $bl2f) and ((defined $seq2filter) and (length($seq2filter) != 0))) {
        return $self->render('wrong_params', msg => 'Enter Sequence to Filter: You entered strange characters! Must be ASCII code!') unless ($seq2filter =~ /^[[:ascii:]]+$/);
        
        my $fseq_info = parse_seq_submitted({
            seqs => \$seq2filter,
            max_num_of_seq => 0,
            min_seq_length => 0,
        });
        if ($fseq_info->{seq_cnt} != 0) {
            rm_ids_not_dup($fseq_info->{ids});
            if (%{ $fseq_info->{ids} }) {
                return $self->render(template => 'b2dsc/duplicate_id', seq => $seq2filter, dup => $fseq_info->{ids});
            }
        }
        $info{raw_id}{fseq} = $fseq_info->{raw_id}; # 加进去
        write_data2file($fseq_info->{seqs}, "$data_dir/$job_id/seq2filter.fa");
    }
    $info{last_task} = 'none';
    update_done(\%info, $job_id, $public_tmp_dir);

    # system 才可以后台执行
    #
    # 什么情况下认为是足够相似，用同样的 blast results
    my $disable_SM = $self->param('disable_SM');
    my $pIdent_SM = $self->param('pIdent_SM');
    my $qCover_SM = $self->param('qCover_SM');

    if (defined $disable_SM and $disable_SM) {
        system "export MOJO_HOME=$home_dir; $home_dir/script/blastn2chrs.pl --jid $job_id --disable_SM &";
    } else {
        $pIdent_SM = 90 unless isValidNumericParam($pIdent_SM, 70, 100);
        $qCover_SM = 90 unless isValidNumericParam($qCover_SM, 70, 100);
        system "export MOJO_HOME=$home_dir; $home_dir/script/blastn2chrs.pl --jid $job_id --pIdent_SM $pIdent_SM --qCover_SM $qCover_SM &";
    }

    $self->redirect_to("/b2dsc/job_status?jid=$job_id");
}

sub isValidNumericParam {
    my($pVal, $min, $max) = @_;
    return 0 unless (defined $pVal);
    return 0 unless (looks_like_number($pVal));
    return 0 unless ($pVal >= $min and $pVal <= $max);
    return 1;
}

# 获取不重复的 job_id，并依此来建立文件夹
sub new_job {
    my $n = shift;
    # my $jid = sprintf "%x", $n;
    my $jid = sprintf "t%x", $n; # 加个 t，表示 tmp
    while (-e "$data_dir/$jid") {
        $n++;
        $jid = sprintf "t%x", $n;
    }
    mkdir $data_dir . '/' . $jid;

    # public/TMP 目录下同时新建一个
    my $public_job_dir = "$public_tmp_dir/$jid";
    mkdir $public_job_dir unless -d $public_job_dir;

    return $jid;
}

sub job_status {
    my $self = shift;

    # Check if parameters have been submitted
    my $validation = $self->validation;
    return $self->render('b2dsc/view_job', days_kept => $days_kept) unless $validation->has_data;
    # 必须包含 jid
    $validation->required('jid')->like(qr/^\w+$/);
    return $self->render('b2dsc/view_job', days_kept => $days_kept) if $validation->has_error;

    my $jid = $self->param('jid');

    # 直接整合到一个 done.json 文件里
    if (-d "$data_dir/$jid") {
        if ($jid !~ /^t/) {
            my $new_jid = clone_job($jid);
            return $self->redirect_to("/b2dsc/job_status?jid=$new_jid");
        }

        # my $done = load_done($jid, $data_dir);
        my $done = load_done($jid, $public_tmp_dir);

        # 序列全部被过滤掉了
        if ($done->{num_of_seqs} == 0) {
            my %id_map;
            while (my($k,$v) = each %{$done->{id_map}{seq2filter}}) {
                $id_map{ $done->{raw_id}{qseq}{$k} } = $done->{raw_id}{fseq}{$v};
            }
            return $self->render(template => 'b2dsc/no_seq_blasted2chrs', id_map => \%id_map);
        }

        my $pIqC = undef;

        # 已经运算完毕，不再自动刷新
        my $prog = $done->{status};

        $pIqC = (exists $done->{pIdent}) ? $done->{pIdent} . '_' . $done->{qCover} : 'NA';

        # my $refSeq = {};
        my $species = $done->{species};
        load_conf_of_species($species);
        # if (exists $done->{ref_seq}) {
        # if (exists $conf->{species}{$species}{refSeq}) {
        #     # $refSeq = $done->{ref_seq};
        #     $refSeq = $conf->{species}{$species}{refSeq};
        # }
        my $refSeq = $conf->{species}{$species}{refSeq} || {};
        # my $specifiers = $conf->{ofmt};
        my $specifiers = exists $done->{ofmt} ? $done->{ofmt} : $conf->{ofmt};
        $specifiers = [ split /\s+/, $specifiers ];
        shift @$specifiers;

        $self->render(jid => $jid, done => $done, days_kept => $days_kept, prog => $prog, pIqC => $pIqC, refSeq => $refSeq, specifiers => $specifiers);
    } else {
        $self->render(template => 'b2dsc/job_not_exist', jid => $jid, days_kept => $days_kept);
    }
}

# 注意 json 这个词应该已经被 mojo 占用了
# done.json, hsps.json
sub handle_ajax {
    my $self = shift;
    
    # Check if parameters have been submitted
    my $validation = $self->validation;
    # 2019.7.14 添加 blastdb
    $validation->required('category')->in(qw/done hsps Ns blastdb/);
    return $self->render(json => {}) if $validation->has_error;
    my $category = $self->param('category');

    # 2019.7.14
    if ($category eq 'blastdb') {
        $validation->required('species')->in(species_available());
        return $self->render(json => {}) if $validation->has_error;
        my $species = $self->param('species');
        load_conf_of_species($species); # 2019..14 添加
        return $self->render(json => $conf->{species}{$species}{blastdb}{chromosome});
    }

    if ($category eq 'Ns') {
        for my $i (qw/species gn/) {
            $validation->required($i)->like(qr/^\w+$/);
        }
        for my $i (qw/hgrp start end/) {
            $validation->required($i)->like(qr/^\d+$/);
        }
        return $self->render(json => {}) if $validation->has_error;
        my $species = $self->param('species');
        my $gn = $self->param('gn');
        my $hgrp = $self->param('hgrp');
        my $start = $self->param('start');
        my $end = $self->param('end');
        # 学到了一点新东西, jsonp 到底干嘛的
        my $jsonp = $self->param('jsoncallback');
        my $json_data = get_Ns($species, $hgrp, $gn, $start, $end);
        if ($jsonp and $jsonp =~ /^[a-zA-Z]\w*$/) {
            return $self->render(text => $jsonp . '(' . to_json($json_data) . ')', format => 'js');
        } else {
            return $self->render(json => $json_data);
        }
    }
    
    $validation->required('jid')->like(qr/^\w+$/);
    return $self->render(json => {}) if $validation->has_error;

    # done.json: raw_id, pIqC, filter_bls
    my $jid = $self->param('jid');
    return $self->render(json => {}) unless (-d "$public_tmp_dir/$jid");
    my $done = load_done($jid, $public_tmp_dir);

    if ($category eq 'done') {
        $validation->required('keys'); # 空格分开吧先
        return $self->render(json => {}) if $validation->has_error;
        my $ks = $self->param('keys');
        $done->{ofmt} = exists $done->{ofmt} ? $done->{ofmt} : $conf->{ofmt};
        return $self->render(json => get_data_from_done($done, $ks));
    } elsif ($category eq 'hsps') {
        for my $i ('sid', 'chr', 'n', 'pIqC') {
            $validation->required($i)->like(qr/^[\w.]+$/);
            return $self->render(json => {}) if $validation->has_error;
        }

        my $sid = $self->param('sid');
        my $chr = $self->param('chr');
        my $n = $self->param('n');
        my $pIqC = $self->param('pIqC');
        return $self->render(json => {}) unless exists $done->{filter_bls}{$pIqC};

        my $f = $home_dir . '/public/' . $done->{filter_bls}{$pIqC}{$sid}{hsps};
        # 每 Mbp 的 hsps 数据
        # 返回 hashref
        my $data_hsps = get_hsps($f, $n, $chr);

        my $task = $self->param('task');
        if (defined $task) { # 暂时保留，万一以后添加别的条件
            if ($task eq 'coords') {
                my $string_ref = coordinates4hsps($chr, $n, $data_hsps);
                return $self->render(text => $$string_ref, format => 'txt');
            }
        }

        # 每 Mbp 的 N
        my ($hgrp, $genome) = ($chr =~ /^(\d+)([a-zA-Z]\w*)$/);
        
        # n = 0 的问题
        my $json_data = { hsps => $data_hsps, nMbp => $n };
        $json_data->{N} = get_Ns($done->{species}, $hgrp, $genome, $n > 0 ? $n * 1e6 : 1, ($n+1)*1e6) if ($self->param('getNs'));
        
        my $jsonp = $self->param('jsoncallback');
        if ($jsonp and $jsonp =~ /^[a-zA-Z]\w*$/) {
            return $self->render(text => $jsonp . '(' . to_json($json_data) . ')', format => 'js');
        } else {
            return $self->render(json => $json_data);
        }
    }
}

# 从 done.js 中获取信息
sub get_data_from_done {
    my($done, $ks) = @_;
    my $data = {};

    my $species = $done->{species};
    load_conf_of_species($species);
    # 早期在 done.json 里保存了 refseq 信息，后来取消，直接放置到了 conf 文件里了
    $done->{ref_seq} = $conf->{species}{$species}{refSeq} || {};

    for my $k (split /\s+/, $ks) {
        if (exists $done->{$k}) {
            $data->{$k} = $done->{$k};
        }
    }

    return $data;
}

sub get_Ns {
    my($species, $hgrp, $gn, $start, $end) = @_;
    my @data;
    load_conf_of_species($species);
    my $ns = $conf->{species}{$species}{N};
    # 查不到
    unless (exists $ns->{chromosome}{$gn}{$hgrp}) {
        return 'No such species or chromosome.';
    }
    # start 可以等于 0
    return 'Bad start/end.' unless ($start >= 0 and $end >= $start);

    my $chr_len = $conf->{species}{$species}{chromosome_length}{$gn}[$hgrp-1];
    return "start > chromosome_length: $chr_len" if $start > $chr_len;

    my $file = join(
        '/', $ns->{dir},
        $ns->{chromosome}{$gn}{$hgrp}
    );
    open my $fh, '<', $file;
    # my @pre;
    while (<$fh>) {
        if (/^(\d+)\t(\d+)/) {
            my($s,$e) = ($1,$2);
            if ($e < $start) {
                next;
            } elsif ($s < $start and $e >= $start) {
                push @data, [$start, $e];
            } elsif ($s >= $start and $e <= $end) {
                push @data, [$s, $e];
            } elsif ($s <= $end and $e > $end) {
                push @data, [$s, $end];
                last;
            }
        }
    }
    close $fh;

    return \@data;
}

sub get_hsps {
    my ($f, $n, $chr) = @_;
    my $json_data = {};
    open my $fh, '<', $f;
    my $sseqid = 'A not exisit id, hei hei hei';
    while (<$fh>) {
        next if /^\s*$/;
        if (/^>(\w+)/) {
            $sseqid = $1;
        } elsif (/^(\d+) /) {
            if (($1 == $n) and ($sseqid eq $chr)) {
                s/^\d+\s+//;
                chomp;
                $json_data = from_json($_);
                last;
            }
        }
    }
    close $fh;
    
    return $json_data;
}

# 考虑是不是用某对参数跑过了
sub filter_bls {
    my $self = shift;

    my $jid = $self->param('jid');
    $jid = (defined $jid and ($jid ne '')) ? $jid : 'NA';

    my $job_dir = "$data_dir/$jid";
    unless (-d $job_dir) {
        $self->render(template => 'b2dsc/job_not_exist', jid => $jid, days_kept => $days_kept);
        return;
    }
    return $self->render(template => 'b2dsc/no_permission', jid => $jid) unless -o $job_dir;

    my $done = load_done($jid, $public_tmp_dir);
    # 其他操作正在进行中
    unless ($done->{status} eq 'free') {
        # status 大概是个占用掉的参数
        $self->render(template => 'b2dsc/running', jid => $jid, job_status => $done->{status});
        return;
    }

    my $pI = $self->param('pIdent');
    my $qC = $self->param('qCover');
    my $sid = $self->param('id4filter'); # all 或某个序列 id
                                    # all : All of them

    # if invalid, use default values
    $pI = (isValidNumericParam($pI, 0, 100)) ? $pI : 85;
    $qC = (isValidNumericParam($qC, 0, 100)) ? $qC : 80;
    $sid = (defined $sid and (exists $done->{raw_id}{qseq}{$sid})) ? $sid : 'All of them';
    my $pIqC = $pI . '_' . $qC;

    my $filtered = 0; # 是否已经用相同参数处理过了
    if (exists $done->{filter_bls}{$pIqC}) {
        if ($sid eq 'All of them') {
            if (join(',', sort keys %{ $done->{raw_id}{qseq} }) eq join(',', sort keys %{ $done->{filter_bls}{$pIqC} })) {
                $filtered = 1;
            }
        } elsif (exists $done->{filter_bls}{$pIqC}{$sid}) {
            $filtered = 1;
        }
    }

    $done->{pIdent} = $pI;
    $done->{qCover} = $qC;
    $done->{just_filtered} = $sid;
    unless ($filtered) {
        $done->{status} = 'filter blast results';
        update_done($done, $jid, $public_tmp_dir);
        if ($sid eq 'All of them') {
            system "export MOJO_HOME=$home_dir; $home_dir/script/filter_bls.pl --jid $jid --perc_identity $pI --qcov_hsp_perc $qC --all &";
        } else {
            system "export MOJO_HOME=$home_dir; $home_dir/script/filter_bls.pl --jid $jid --perc_identity $pI --qcov_hsp_perc $qC --id $sid &";
        }

        $self->redirect_to("/b2dsc/job_status?jid=$jid");
    } else {
        $done->{last_task} = "filter blast results";
        # 更新 done.json 里的 pIdent 和 qCover 信息
        update_done($done, $jid, $public_tmp_dir);
        $self->redirect_to("/b2dsc/job_status?jid=$jid");
    }
}

# 是否直接按照 job_id 转换来的十进制时间来删除？
# 按照上一次修改时间来删除
sub remove_old_jobs {
    my @dirs = glob "$data_dir/*";
    my @dirs_pub_tmp = glob "$public_tmp_dir/*";

    @dirs = (@dirs, @dirs_pub_tmp);

    return unless @dirs; # 空的，直接返回

    for my $d (@dirs) {
        my $jid = basename($d);
        next unless $jid =~ /^t/; # 只删除 tmp 的
        next unless -d $d; # 只删除旧目录
        next unless -O $d; # File is owned by real uid.
        
        # 按照修改时间？
        if ( time() - ( stat($d) )[9] > (60*60*24*$days_kept) ) {
            remove_tree($d);
        }
    }

    return 0;
}

# 输出用于 blast 的序列信息
# GET
sub get_seq {
    my $self = shift;

    my $jid = $self->param('jid');
    # 真的有人使坏
    # $jid = (defined $jid and $jid ne '' and ($jid =~ /^\w+$/)) ? $jid : 'NA';
    $jid = (defined $jid and ($jid =~ /^\w+$/)) ? $jid : 'NA';

    unless (-d "$data_dir/$jid") {
        $self->render(template => 'b2dsc/job_not_exist', jid => $jid, days_kept => $days_kept);
        return;
    }

    open(my $fh, '<', "$data_dir/$jid/input.fa");
    my $seqs;
    {
        local $/;
        $seqs = <$fh>;
    }
    close $fh;

    $self->render(jid => $jid, seqs => $seqs);
}

sub count2tsv {
    my $self = shift;
    my $validation = $self->validation;
    # 必须包含 jid
    $validation->required('jid')->like(qr/^\w+$/);
    return $self->render(text => 'Invalid job id.', format => 'txt') if $validation->has_error;
    my $jid = $self->param('jid');
    return $self->render(text => 'No such job.', format => 'txt') unless (-d "$public_tmp_dir/$jid");

    # $validation->required('sid', 'pIqC')->like(qr/^[\w.]+$/);
    # return $self->render(text => "Invalid heheheeh.", format => 'txt') if $validation->has_error;
    # 似乎新版的 mojo 只支持单元素 like ？ v7.21
    # 旧一些的 v6.15 没问题。
    # 还是这样保险
    for my $i (qw/sid pIqC/) {
        $validation->required($i)->like(qr/^[\w.]+$/);
        return $self->render(text => "Invalid $i.", format => 'txt') if $validation->has_error;
    }
    my $sid = $self->param('sid');
    my $pIqC = $self->param('pIqC');
    my $done = load_done($jid, $public_tmp_dir);
    return $self->render(text => "No such pIqC: $pIqC", format => 'txt') unless exists $done->{filter_bls}{$pIqC};
    return $self->render(text => "No such sid: $sid, or not filtered.", format => 'txt') unless exists $done->{filter_bls}{$pIqC}{$sid};

    my $f = $home_dir . '/public/' . $done->{filter_bls}{$pIqC}{$sid}{count};
    my $d_cnts = json2href($f);
    my $out = "# Position (Mbp): or better called 'range', e.g. '0' means in the range of 0–1 Mbp.\n";
    $out .= join("\t", 'Chromosome', 'Position (Mbp)', 'Number of HSPs') . "\n";
    for my $gn (sort keys %$d_cnts) {
        my $dgn = $d_cnts->{$gn};
        for my $hn (sort {$a <=> $b} keys %$dgn) {
            my $dghn = $dgn->{$hn};
            for my $n (sort {$a <=> $b} keys %$dghn) {
                $out .= join("\t", $hn . $gn, $n, $dghn->{$n}) . "\n";
            }
        }
    }
    return $self->render(text => $out, format => 'txt');
}

sub coordinates4hsps {
    my($chr, $n, $hsps) = @_;
    # $hsps = from_json($$hsps);
    my %strand = (plus => '+', minus => '-');
    my $out = "# Chromosome: $chr\n";
    $out .= join("\t", qw/Start End Strand/) . "\n";
    for my $s (sort keys %$hsps) { # s : strand
        my $ds = $hsps->{$s};
        for my $p100k (sort {$a <=> $b} keys %$ds) {
            for my $se (@{$ds->{$p100k}}) {
                $se->[1] += 1e5 if $se->[1] < $se->[0];
                my $start = $n*1e6 + $p100k*1e5 + $se->[0];
                my $end = $n*1e6 + $p100k*1e5 + $se->[1];
                $out .= join(
                    "\t", $start, $end, $strand{$s}
                ) . "\n";
            }
        }
    }
    return \$out;
}

sub num_of_chrs_in_a_group {
    my $species = shift;
    # our $conf;
    load_conf_of_species($species);
    my $n;

    my $href = $conf->{species}{$species}{blastdb}{chromosome};
    for my $g (keys %{ $href }) {
        $n = scalar keys %{ $href->{$g} };
        last;
    }

    return $n;
}

# 分析从网页提交过来的 sequences
#
# 参数：
#   seqs          : SCALAR ref. 序列为 FASTA 格式，DNA
#   max_num_of_seq: int. 最多接受序列数
#   min_seq_length: int. 最小序列长度
#
# 返回：
#   raw_id  : hashref. 原始序列 id
#   seqs    : SCALAR ref. 处理后符合条件的序列
#   ids     : hashref. id 散列，用于分析是否有 id 重复
#   seq_cnt : int. 符合条件的序列数目
#   seq_lens: hashref. 各序列的长度
#
# 事项：
#   1. 原序列 id 中非 '\w' 以及 '.' 的将替换为 '_'，原 id 保存到 desc
#   2. desc 同时记录序列长度
#
sub parse_seq_submitted {
    my $params = shift;
    
    # $seqs 采用引用，要求必须是 FASTA 格式的
    my($seqs, $max_num, $min_len) = @{ $params }{qw/seqs max_num_of_seq min_seq_length/};
    my %ids; # 万一 id 重复
    my %raw_id; # 记录新 id 和原 id 之间的对应关系
    my %seq_lens; # 2017.5.23

    # 检查是否是 dna 序列
    open my ($str_fh), '<', $seqs;
    my $stream = Bio::SeqIO->newFh(-format => 'Fasta', -fh => $str_fh);
    my $o_str;
    open my ($str_o_fh), '>', \$o_str;
    my $stream_o = Bio::SeqIO->newFh(-format => 'Fasta', -fh => $str_o_fh);
    my $seq_cnt = 0; # 序列计数

    while (my $seq = <$stream>) {
        my $len = $seq->length();
        if ($seq->alphabet eq 'dna' and $len >= $min_len) {
            $seq_cnt++;
            
            my $rid = $seq->id();  # raw id
            $ids{$rid}++;          # id 重复问题
            my $desc = "rid=$rid;length=" . $seq->length(); # 万一以前有 rid=xx 什么的
            $seq->desc($desc);

            my $sid = $rid;        # 复制一份
            $sid =~ s/[^\w.]/_/g;  # a-zA-Z0-9_. 之外的字符替换为 _
            # 可能会有替换后 sid 重复的问题
            if (exists $raw_id{$sid} and ($raw_id{$sid} ne $rid)) {
                my $n = 0;
                my $tid = $sid; # tmp id
                while (exists $raw_id{$tid}) {
                    $n++;
                    $tid = $sid . '.' . $n;
                }
                $sid = $tid;
            }
            $seq->id($sid);

            $raw_id{$sid} = $rid;
            $seq_lens{$sid} = $len;

            print $stream_o $seq;
        }
        # 是否设定了最大允许序列数
        last if $max_num > 0 and $seq_cnt == int($max_num);
    }
    close $str_fh;
    close $str_o_fh;

    return {
        raw_id => \%raw_id,
        seqs => \$o_str,
        ids => \%ids,
        seq_cnt => $seq_cnt,
        seq_lens => \%seq_lens,
    };
}

sub write_data2file {
    my($d, $fname) = @_;
    
    open my $fh, '>', $fname;
    print $fh $$d;
    close $fh;
}

sub rm_ids_not_dup {
    my $ids_href = shift;
    # id 重复问题
    while (my($k,$v) = each %{ $ids_href }) {
        if ($v == 1) {
            delete $ids_href->{$k};
        }
    }

    return $ids_href;
}

# 2019.1.4
# 变更了 conf 策略
sub load_conf_of_all_species {
    my @conf_files = <$conf_dir_species/*.json>;
    # 如果一个配置文件都没有，初始化一下
    init_conf_file("$conf_dir_species/example.json") unless @conf_files;
    for my $f (@conf_files) {
		my $species = basename($f, '.json');
		# $conf->{species}{$species} = get_conf(join '/', $conf_dir_species, "$species.json");
        load_conf_of_species($species);
	}

    return $conf;
}

# 2019.7.14
# 减少不必要的加载
sub species_available {
    my @conf_files = <$conf_dir_species/*.json>;
    # 如果一个配置文件都没有，初始化一下
    init_conf_file("$conf_dir_species/example.json") unless @conf_files;

    return map { basename($_, '.json') } @conf_files;
}

sub load_conf_of_species {
    my $species = shift;
    # our $conf;
    my $f = join '/', $conf_dir_species, "$species.json";
    $conf->{species}{$species} = get_conf($f);

    return $conf;
}

# -----------------------------------------------
sub known_probes {
    my $self = shift;

    $conf->{known_probes} = get_conf($kp_conf_file);

    my %info; # 注意不可以命名为 %data
    my %imgs;
    my $kp = $conf->{known_probes}; # known_probes
    for my $s (keys %$kp) {

        # 2018.5.9 - 改变策略，不再需要
        # 恢复，删掉多余的文件
        # restore_kept_job($jids{$s});

        my $done = load_done($kp->{$s}, $public_tmp_dir);
        my $in = Bio::SeqIO->new(
            -file => join('/', $data_dir, $kp->{$s}, 'input.fa'),
            -format => 'Fasta'
        );
        while (my $seq = $in->next_seq()) {
            my $id = $seq->id();
            my $raw_id = $done->{raw_id}{qseq}{$id};
            my $o_str;
            open my ($str_o_fh), '>', \$o_str;
            my $stream_o = Bio::SeqIO->newFh(-format => 'Fasta', -fh => $str_o_fh);
            print $stream_o $seq;
            close $str_o_fh;
            $info{$s}{$id}{seq} = $o_str;
            $info{$s}{$id}{rid} = $raw_id;
        }
    }

    $self->render(info => \%info, jids => $kp);
}

sub clone_job {
    my $jid = shift;
    my $done = load_done($jid, $public_tmp_dir);
    my $new_jid = new_job(time);
    symlink("../$jid/input.fa", "$data_dir/$new_jid/input.fa");
    update_done($done, $new_jid, $public_tmp_dir);

    return $new_jid;
}

# -----------------------------------------------
# docs
sub parameters {
    my $self = shift;
    $self->render;
}

sub about {
    my $self = shift;
    $self->render;
}

sub help {
    my $self = shift;
    $self->render(conf => $conf);
}

sub contact {
    my $self = shift;
    $self->render;
}

sub update_log {
    my $self = shift;
    $self->render;
}

# -----------------------------------------------
1;